#####################################
##REPLICATION FOR MCGHEE, ET AL.,####
##"A PRIMARY CAUSE OF PARTISANSHIP"##
#####################################

##Table 2##

#Load Data#
library(Zelig)
library(sandwich)
setwd("XXXX") #ENTER DIRECTORY PATHNAME IN PLACE OF XXXX

legyrd <- read.csv("legislator primaries.dem.csv", header=TRUE)
legyrr <- read.csv("legislator primaries.rep.csv", header=TRUE)

#generate election year counter#
legyrd$yrcnt <- (legyrd$elec-1992)/2
legyrr$yrcnt <- (legyrr$elec-1992)/2

#drop ideowt var, which is not used for this table#
legyrd$ideowt <- NULL
legyrr$ideowt <- NULL

#drop missing data#
legyrd <- na.omit(legyrd)
legyrr <- na.omit(legyrr)

#define years & states as factors for FE regression#
legyrd$icpsrst <- as.factor(legyrd$icpsrst)
legyrd$yrcnt <- as.factor(legyrd$yrcnt)
legyrr$icpsrst <- as.factor(legyrr$icpsrst)
legyrr$yrcnt <- as.factor(legyrr$yrcnt)

##Model results: full diff-in-diff##
#Democrats#
#Model 1#
z.out <- zelig(pred_np ~ semicl + semiop + open + nonpart + 
	icpsrst + yrcnt, robust=list(method="vcovHAC"), 
	model="ls", data=legyrd)
summary(z.out)
length(legyrd$icpsrst)

#Model 2#
z.out <- zelig(pred_np ~ semicl + semiop + open + nonpart + pvote +
	icpsrst + yrcnt, robust=list(method="vcovHAC"), 
	model="ls", data=as.data.frame(legyrd))
summary(z.out)
length(legyrd$icpsrst)

#Republicans#
#Model 1#
z.out <- zelig(pred_np ~ semicl + semiop + open + nonpart + 
	icpsrst + yrcnt, robust=list(method="vcovHAC"), 
	model="ls", data=as.data.frame(legyrr))
summary(z.out)
length(legyrr$icpsrst)

#Model 2#
z.out <- zelig(pred_np ~ semicl + semiop + open + nonpart + pvote +
	icpsrst + yrcnt, robust=list(method="vcovHAC"), 
	model="ls", data=as.data.frame(legyrr))
summary(z.out)
length(legyrr$icpsrst)

